# Python内置urllib爬虫库的使用
from urllib import request
import re

# 准备请求对象
url = "http://news.baidu.com"
req = request.Request(url)

res = request.urlopen(req)

html = res.read().decode("UTF-8")

pat = '<a href="(.*?)" mon=".*?" target="_blank">(.*?)</a>'
dlist = re.findall(pat, html)

for v in dlist:
    if v[1] != '#{title}':
        print(v[1] + ":" + v[0])
